# Volha Charnysh 
# Explaining outgroup bias in weak states: Religion and legibility in the 1891-92 Russian famine” (World Politics)
# Replication for cross-sectional analysis
# R version 3.6.3 
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS High Sierra 10.13.6
# ----------

rm(list = ls())

### Install missing packages
ipak <- function(pkg){
   new.pkg <- pkg[!(pkg %in% installed.packages()[,"Package"])]
  if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
   sapply(pkg, require, character.only = TRUE)
}

packages <- c("foreign", "sandwich", "texreg", "corrplot", "qgraph", "stargazer", "factoextra", "psych", "lmtest", "arm", "sensemakr", "ggplot2", "visreg")

ipak(pkg = packages)


#####################################################################################################
##################### Figure 1. Religious demographics and mortality in provinces ###################
#####################################################################################################

### Religious Demographics and mortality in provinces affected by the famine ###

gub<-read.dta("Province_Mortality.dta")

p1 <- ggplot(data = gub, mapping = aes(x = ShareNonOrthodox1870, y = ExcessMortalityV1))+
  geom_point(alpha = 0.6, shape = 21, colour = "black", fill = "white", size = 2, stroke = 2) + 
  geom_smooth(method = "lm")+
  labs(y="Excess Famine Mortality", x="Share non-Orthodox")+
  theme(axis.title = element_text(size = (15)), axis.text = element_text(size = (14))) + theme_bw()

ggsave("Figure1.jpeg", p1, width = 8, height = 8)


######################################################################
##################### ANALYSIS USING DISTRICT DATA ###################
######################################################################
fam<-read.csv("DatasetMainMay2021.csv")

####### CREATE FUNCTIONS ########

#function to create formulas
formulas <- function(a, b, c) {
  formulas <- list()
  n <- 0
    for(k in c){
    for (i in a) {
    for (j in b) {
      n <- n + 1
    formulas[n] <- paste(i, j, k)
      }
    }
}
  return(formulas)
}


#function to run models
run_models <- function(formlist) {
  models <- list()
      n <- 0
    for (i in 1:length(formlist)){
      n <- n + 1
      models[[n]] <- eval(parse(text=formlist[[n]])) 
    }
  return(models)
}


#vcov function 
hc1<-function(x) vcovHC(x, type="HC1")  

#function to extract coefficients + calculate errors
run_coef <- function(output) {
 stdev <- list() 
      n <- 0
    for (i in 1:length(output)){
      n <- n + 1
     stdev[[n]] <- coeftest(output[[n]], vcov. = hc1)
    }
  return(stdev)    
}


#############################################
##########    PCA Analysis     ##############
#############################################

relief <- fam[c("onrelief_avg","LocalAidAllPuds","StateAidAllPuds","PudovNaEdokaMean","reliefmonths", "onset_mo")]

rownames(relief) <- interaction(fam$uezd_name,fam$ter_ID, sep = "-")
colnames(relief) <- c("Mean pop on relief", "Total local aid", "Total state aid", "Average bread loan",  "Months on relief", "Relief onset")
  
corrplot(cor(relief, use = "complete.obs"))

qgraph(cor(relief, use = "complete.obs"), layout = "spring")

relief_pca <- princomp(na.omit(relief), cor = TRUE, scores = TRUE)

summary(relief_pca)

#Table A.9 for the Appendix: variance explained by each component 
fviz_eig(relief_pca) #Plot loadings

ggsave("PrincipalComponent.pdf",fviz_eig(relief_pca), width=6, height=7)

relief_pca_nrot <- principal(na.omit(relief), nfactors = 2, rotate = "none")

relief_pca_scores <- relief_pca_nrot$scores 

loadings(relief_pca_nrot)

row.names(fam) <- interaction(fam$uezd_name,fam$ter_ID, sep = "-")


#### dataset for main analyses (with indicators from first principal component) ####
fam13 <- merge(fam, relief_pca_scores, by = "row.names", all.x = TRUE)


######################################################################
################# Table A.1: DESCRIPTIVE STATISTICS  #################
######################################################################

fam13$ReceivedKaz8890<-fam13$receivedRubKaz1888+fam13$receivedRubKaz1889+fam13$receivedRubKaz1890 

fam13$ReceivedKazZem8890<-(fam13$receivedRubKaz1888+fam13$receivedRubKaz1889+fam13$receivedRubKaz1890+fam13$OkladyRubZemReceived88 +fam13$OkladyRubZemReceived89 +fam13$OkladyRubZemReceived90)

fam13$TaxtoLand8890<-fam13$ReceivedKazZem8890/fam13$AreaPeasLand92 

fam13$LocalLoanPC <-fam13$LocalAidAllPuds/fam13$TotPop
fam13$StateLoanPC <-fam13$StateAidAllPuds/fam13$TotPop
fam13$AvgPopOnReliefPct <- fam13$OnReliefAvg/fam13$TotPop
fam13$PopDensity <- fam13$TotPop/fam13$ln_area
fam13$LnTotPop<-log(fam13$TotPop)
fam13$AvgRecrHeightM<-fam13$AvgRecrHeight/100

#Note on calculating harvest (this variable is already in the dataset): 
#fam13$ostatokSum1891<-fam13$OstatokRyeBoth1891 +fam13$OstatokWheatW1891+ fam13$OstatokWheatS1891+ fam13$OstatokOats1891
#fam13$ostatokpp2<-fam13$ostatokSum1891/fam13$TotPop
#ostatokpp2c<-ifelse(fam13$ostatokpp2>100, NA, fam13$ostatokpp2) #remove what must be typo in the original publication

vars<-c("ostatokpp2c","sh_NOTorthodox_1870", "sh_muslim_1870","sh_nonOrthNonMusl1870","sh_russian_1897","ShareTurk", "ShareNonRusNonTurk", "religfrac_1870","AvgCommuneLandPP", "AvgRecrHeightM",
        "railDist", "captPerArea", "distStPetKm", "nobilityper1000pop77", "serfperc1","blacksoil", "DvorsWithHorsesPct", "onset_mo","endrelief_mo","reliefmonths", 
        "PudovNaEdokaMean", "StateLoanPC", "LocalLoanPC", "AvgPopOnReliefPct", "grain.reserves.in.kg.head", "ZemFoodSupplyCapDummy", "TaxtoLand8890", "Myers.15_74", "AvgYields1883.87", "PC1", "TotPop")


stargazer(fam13[!is.na(fam13$StateLoanPC),which(names(fam13) %in% vars) ], title="Descriptive statistics", digits=2,summary.stat=c("n", "mean","sd",  "min", "max")
          , covariate.labels=c("Blacksoil", "Serfdom (share)", "Share non-Russian (1897)", "Religious fractionalization (1870)", "Share Muslim (1870)", "Relief onset (month)", "Distance to railway (km)", "Population", "Land captains per km2", 
                   "Share other Non-Orthodox (1870)","Distance to St. Petersburg (km)", "Share Non-Orthodox (1870)","Horses per household", "Noble Landowners per 1000 (1877)", "Average land allotment", 
                   "Myers Index", "Mean Food Loan (pud)","Months on relief", "Harvest in 1891 (pud)", "Grain reserves (kg)", "Relief termination (month)", "Share Turkic", "Share Other non-Russian", "Average Yield 1888-87", "Principal Component 1", "Taxes to land, 1888-1890", "Local aid per capita", "State aid per capita", "Average population on relief pc", "Average Recruit Height (M)"))


#########################################################
####     Ethnic demography and relief indicators    #####
####  Table 2 (main text) and Table A.11 (Appendix) #####
#########################################################

dv <- list("lm(PC1 ~ ostatokpp2c +", "lm(reliefmonths ~ ostatokpp2c +", "lm(onset_mo ~  ostatokpp2c +", "lm(PudovNaEdokaMean ~  ostatokpp2c +", "lm(onrelief_avg ~  ostatokpp2c +", "lm(log(StateAidAllPuds+1) ~  ostatokpp2c +", "lm(log(LocalAidAllPuds+1) ~  ostatokpp2c +")
identity <- list("sh_muslim_1870 + sh_nonOrthNonMusl1870", "sh_NOTorthodox_1870", "ShareTurk + ShareNonRusNonTurk", "religfrac_1870")
controls <- list("+ UrbanShare1883 + HarvestDrop2 + AvgCommuneLandPP + AvgRecrHeightM + lnrailDist + captPerArea + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long  +LnTotPop + gub_name, data = fam13)")

reg.formula <- formulas(dv, identity, controls)
mod <- run_models(reg.formula)
mod.cl <- run_coef(mod)


MainPaper <- texreg(
  l = list(mod[[1]], mod[[5]], mod[[9]], mod[[13]], mod[[17]], mod[[21]]),
  override.se=list(mod.cl[[1]][,2], mod.cl[[5]][,2], mod.cl[[9]][,2], mod.cl[[13]][,2], mod.cl[[17]][,2], mod.cl[[21]][,2]),
  override.pval=list(mod.cl[[1]][,4], mod.cl[[5]][,4], mod.cl[[9]][,4], mod.cl[[13]][,4], mod.cl[[17]][,4], mod.cl[[21]][,4]),      
  custom.model.names = c("(1)","(2)","(3)","(4)","(5)","(6)"),
  custom.coef.names=c("Harvest pc 1891","Share Muslim","Share other non-Orthodox", "Harvest Drop"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:reliefresults",
  omit.coef = "(Intercept)|gub_name|area|lat|long|lnrailDist|captPerArea|AvgYields1883.87|AvgCommuneLandPP|UrbanShare1883|AvgRecrHeight|lndistStPetKm|serfperc1|DvorsWithHorsesPct|blacksoil|ln_area|TotPop|nobilityper1000pop77",
  include.rsquared = FALSE, 
  include.rmse=FALSE
)


## Appendix table A.11: linguistic clevagaes instead of religious cleavages: Turkic and non-Turkic other.
Appendix <- texreg(
  l = list(mod[[3]], mod[[7]], mod[[11]], mod[[15]], mod[[19]], mod[[23]]),
  override.se=list(mod.cl[[3]][,2], mod.cl[[7]][,2], mod.cl[[11]][,2], mod.cl[[15]][,2], mod.cl[[19]][,2], mod.cl[[23]][,2]),
  override.pval=list(mod.cl[[3]][,4], mod.cl[[7]][,4], mod.cl[[11]][,4], mod.cl[[15]][,4], mod.cl[[19]][,4], mod.cl[[23]][,4]),      
  custom.model.names = c("(1)","(2)","(3)","(4)","(5)","(6)"),
  custom.coef.names=c("Harvest pc 1891","Share Turkic", "Share non-Russian non-Turkic", "Harvest Drop"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:reliefLang",
  omit.coef = "(Intercept)|gub_name|area|lat|long|lnrailDist|captPerArea|AvgYields1883.87|AvgCommuneLandPP|UrbanShare1883|AvgRecrHeight|lndistStPetKm|serfperc1|DvorsWithHorsesPct|blacksoil|ln_area|TotPop|nobilityper1000pop77",
  include.rsquared = FALSE, 
  include.rmse=FALSE
)


#########################################################
########## Figure 5 (based on Table 2 above) ############
#########################################################

reg1as<-standardize(mod[[1]], unchanged="gub_name",standardize.y=TRUE)
reg2as<-standardize(mod[[5]], unchanged="gub_name",standardize.y=TRUE)
reg3as<-standardize(mod[[9]], unchanged="gub_name",standardize.y=TRUE)
reg4as<-standardize(mod[[13]], unchanged="gub_name",standardize.y=TRUE)
reg5as<-standardize(mod[[17]], unchanged="gub_name",standardize.y=TRUE)
reg6as<-standardize(mod[[21]], unchanged="gub_name",standardize.y=TRUE)


plot.outMuslim <- sapply(list(reg1as, reg2as,  reg3as, reg4as, reg5as, reg6as), function(x){
  coeftest(x, vcov.=hc1)[3,1:2] 
})
plot.outOther <- sapply(list(reg1as, reg2as,  reg3as, reg4as, reg5as, reg6as), function(x){ 
  coeftest(x, vcov.=hc1)[4,1:2] 
})

plot.outMuslim <- as.data.frame(t(plot.outMuslim))
plot.outOther <- as.data.frame(t(plot.outOther))

plot.outAllS <- rbind(plot.outMuslim, plot.outOther)

plot.outAllS$dv <- factor(rep(c("PC 1", "Months on Relief", "Relief Onset", "Mean Bread Loan","Average Pop on Relief", "Ln(State Aid+1)"), 2), 
                            levels=c("PC 1", "Months on Relief", "Relief Onset",  "Mean Bread Loan","Average Pop on Relief", "Ln(State Aid+1)")) 
plot.outAllS$iv <- c(rep("Share Muslim",6), rep("Share Other non-Orthodox", 6))
plot.outAllS$lb <- plot.outAllS$Estimate - 1.96*plot.outAllS$`Std. Error`
plot.outAllS$ub <- plot.outAllS$Estimate + 1.96*plot.outAllS$`Std. Error`


figure5 <- ggplot(
  data = plot.outAllS,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape =iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +scale_fill_grey(start = 0, end = .9)+
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), alpha = 0.75, position = position_dodge(width = 0.1), size=1) +
  scale_y_continuous(name = "Average partial effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Principal Component 1","Months on Relief",  "Relief Onset", "Mean Bread Loan","Average Pop on Relief", "Ln(State Aid+1)")) +
  theme_bw() +
  theme(
    text=element_text(size=12, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 12,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=12, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Figure5.jpg",figure5, width = 8, height = 6)


###############################################
####### FIGURE 6: SENSITIVITY ANALYSIS ########
###############################################

sensitivity <- sensemakr(mod[[1]], treatment = "sh_muslim_1870",
                         benchmark_covariates = c("ostatokpp2c", "HarvestDrop2", "lnrailDist", "serfperc1" ),
                         kd = 1, ky = 1, q = 0.5)

fig_sense_reg0a <- function(){
  par(oma = c(b=.3,r=1,t=.3,l=1), mar = c(b=.3, r=1, t=.3, l=1))
  ovb_contour_plot(sensitivity, sensitivity.of = "estimate", col.contour = "grey100", col.thr.line = "black", 
                   lim.y = 0.4, lim = 0.4, #estimate.threshold = 0, 
                   label.text = FALSE,
                   cex.label.text = 0.1, cex.lab=1,
                   round = 3)
  text(x = c(sensitivity$bounds[, 2], -0.01), 
       y = c(sensitivity$bounds[, 3], -0.01), 
       labels = c("Harvest pc\n1891",  "Harvest Drop", 
                  "Ln Distance to the Railway", "Serfdom", "Unadjusted"),
       cex = .8, adj = c(-0.1, 0.5))
  legend(0.15, 0.38, legend=c("Threshold for continuous confounder to reduce\nthe coefficient on Share Muslim by half"),
         col=c("black"), lty=c(2), cex=.9, lwd=1, bty = "n")
  legend(0.168, 0.35, legend=c("Influence of the observed covariate on the\noutcome and Share Muslim"),
         pch=23, col = "black", pt.bg = "red", cex=.9, pt.cex = 1, bty = "n")
}


#################################################################################
######## Figure 7: Scatterplots of legibility and religious demography ##########
#################################################################################

jpeg("Figure7.jpg", width = 1200, height = 600)
par(mfcol=c(1,2))
scatter.smooth(fam13$sh_muslim_1870, fam13$Myers.15_74, span=1/3, pch=20, xlab="Share Muslim", ylab="Myers Index")
scatter.smooth(fam13$sh_nonOrthNonMusl1870, fam13$TaxtoLand8890, span=1/3, pch=20, xlab="Share Muslim", ylab="Taxes collected per unit land, 1888-1890")


#################################################################################################################
##############   Table 3: Relationships between religious demography, legibility, and taxes   ###################
#################################################################################################################

dv.3a <- list("lm(Myers.15_74 ~")  
dv.3b<-list("lm(TaxtoLand8890 ~")
identity.3<-list("sh_muslim_1870 + sh_nonOrthNonMusl1870", "ShareTurk + ShareNonRusNonTurk", "Myers.15_74")
#note 1897 literacy is a covariate only for Myers Index DV.
controls.3a <- list("+ lndistStPetKm + UrbanShare1883 + serfperc1 + lnrailDist + mliteracy_1897 + AvgCommuneLandPP + AvgRecrHeight + AvgYields1883.87 + nobilityper1000pop77 + DvorsWithHorsesPct + blacksoil + ln_area + LnTotPop + lat*long + gub_name, data = fam13)")
controls.3b <- list("+ lndistStPetKm + UrbanShare1883 + serfperc1 + lnrailDist + AvgCommuneLandPP + AvgRecrHeight + AvgYields1883.87 + nobilityper1000pop77 + DvorsWithHorsesPct + blacksoil + ln_area + LnTotPop + lat*long + gub_name, data = fam13)")


reg.formula.3a <- formulas(dv.3a, identity.3[1:2], controls.3a)
reg.formula.3b <- formulas(dv.3b, identity.3, controls.3b)

mod.3 <- run_models(c(reg.formula.3a, reg.formula.3b))
mod.cl.3 <- run_coef(mod.3)

table.3 <- texreg(
  l = list(mod.3[[1]], mod.3[[2]], mod.3[[3]], mod.3[[4]], mod.3[[5]]),
  override.se=list(mod.cl.3[[1]][,2], mod.cl.3[[2]][,2], mod.cl.3[[3]][,2], mod.cl.3[[4]][,2], mod.cl.3[[5]][,2]),
  override.pval=list(mod.cl.3[[1]][,4], mod.cl.3[[2]][,4], mod.cl.3[[3]][,4], mod.cl.3[[4]][,4], mod.cl.3[[5]][,4]),      
  custom.model.names = c("(1)","(2)","(3)","(4)","(5)"),
  custom.coef.names=c("Share Muslim ","Share other non-Orthodox ", "Share Turkic", "Share other non-Russian",  "Myers Index"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:legibilitytax",
  omit.coef = "(Intercept)|gub_name|lat|long|ln_area|LnTotPop|AvgCommuneLandPP|AvgRecrHeight|AvgYields1883.87|nobilityper1000pop77|DvorsWithHorsesPct|blacksoil|lnrailDist|UrbanShare1883|lndistStPetKm|serfperc1|mliteracy_1897",
  include.rsquared = FALSE, 
  include.rmse=FALSE
) 


##########################################################
#############    Table 4: Panels A + B     ###############
##########################################################

#############################################
##############    Panel A      ##############
#############################################
dv.4A <- list("lm(PC1 ~", "lm(reliefmonths ~ ", "lm(onset_mo ~ ", "lm(PudovNaEdokaMean ~ ", "lm(onrelief_avg ~ ", "lm(log(StateAidAllPuds+1) ~", "lm(log(LocalAidAllPuds+1) ~ ")
iv.4A <- list("Myers.15_74")
controls.4A <- list("+ ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + captPerArea + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + LnTotPop + gub_name, data=fam13)")

#create and run formulas + get coefficients
reg.formula.4A <- formulas(dv.4A, iv.4A, controls.4A)
mod.4A <- run_models(reg.formula.4A)
mod.cl.4A <- run_coef(mod.4A)


Table.4A <- texreg(
  l = list(mod.4A[[1]], mod.4A[[2]], mod.4A[[3]], mod.4A[[4]], mod.4A[[5]],  mod.4A[[6]]),
  override.se=list(mod.cl.4A[[1]][,2], mod.cl.4A[[2]][,2], mod.cl.4A[[3]][,2], mod.cl.4A[[4]][,2], mod.cl.4A[[5]][,2], mod.cl.4A[[6]][,2]),
  override.pval=list(mod.cl.4A[[1]][,4], mod.cl.4A[[2]][,4], mod.cl.4A[[3]][,4], mod.cl.4A[[4]][,4], mod.cl.4A[[5]][,4], mod.cl.4A[[6]][,4]),      
  custom.model.names = c("(1)","(2)","(3)","(4)","(5)","(6)"),
  custom.coef.names=c("Myers Index (1897)", "Harvest pc 1891", "Harvest Drop"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:reliefSig",
  omit.coef = "(Intercept)|gub_name|ln_area|lat|long|lnrailDist|AvgCommuneLandPP|captPerArea|AvgYields1883.87|UrbanShare1883|lndistStPetKm|serfperc1|DvorsWithHorsesPct|blacksoil|TotPop|ln_area|nobilityper1000pop77",
  include.rsquared = FALSE, 
  include.rmse = FALSE
)

#############################################
##############    Panel B      ##############
#############################################

dv.4B <- list("lm(PC1 ~", "lm(reliefmonths ~ ", "lm(onset_mo ~ ", "lm(PudovNaEdokaMean ~ ", "lm(onrelief_avg ~ ", "lm(log(StateAidAllPuds+1) ~", "lm(log(LocalAidAllPuds+1) ~ ")
iv.4B <- list("TaxtoLand8890")
controls.4B <- list("+ ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + LnTotPop + gub_name, data=fam13)")

reg.formula.4B <- formulas(dv.4B, iv.4B, controls.4B)
mod.4B <- run_models(reg.formula.4B)
mod.cl.4B <- run_coef(mod.4B)

Table.4B <- texreg(
  l = list(mod.4B[[1]], mod.4B[[2]], mod.4B[[3]], mod.4B[[4]], mod.4B[[5]],  mod.4B[[6]]),
  override.se=list(mod.cl.4B[[1]][,2], mod.cl.4B[[2]][,2], mod.cl.4B[[3]][,2], mod.cl.4B[[4]][,2], mod.cl.4B[[5]][,2], mod.cl.4B[[6]][,2]),
  override.pval=list(mod.cl.4B[[1]][,4], mod.cl.4B[[2]][,4], mod.cl.4B[[3]][,4], mod.cl.4B[[4]][,4], mod.cl.4B[[5]][,4], mod.cl.4B[[6]][,4]),      
  custom.model.names = c("(1)","(2)","(3)","(4)","(5)","(6)"),
  custom.coef.names=c("Myers Index (1897)", "Harvest pc 1891", "Harvest Drop"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:reliefSig",
  omit.coef = "(Intercept)|gub_name|ln_area|lat|long|lnrailDist|AvgCommuneLandPP|captPerArea|AvgYields1883.87|UrbanShare1883|lndistStPetKm|serfperc1|DvorsWithHorsesPct|blacksoil|TotPop|ln_area|nobilityper1000pop77",
  include.rsquared = FALSE, 
  include.rmse = FALSE
)

###################################################################
####### Figure 8: Standardized coefficients on Myers Index ########
###################################################################

leg1s<-standardize(mod.4A[[1]], unchanged="gub_name", standardize.y=TRUE)
tax1s<-standardize(mod.4B[[1]], unchanged="gub_name", standardize.y=TRUE)
leg2s<-standardize(mod.4A[[2]], unchanged="gub_name", standardize.y=TRUE)
tax2s<-standardize(mod.4B[[2]], unchanged="gub_name", standardize.y=TRUE)
leg3s<-standardize(mod.4A[[3]], unchanged="gub_name", standardize.y=TRUE)
tax3s<-standardize(mod.4B[[3]], unchanged="gub_name", standardize.y=TRUE)
leg4s<-standardize(mod.4A[[4]], unchanged="gub_name", standardize.y=TRUE)
tax4s<-standardize(mod.4B[[4]], unchanged="gub_name", standardize.y=TRUE)
leg5s<-standardize(mod.4A[[5]], unchanged="gub_name", standardize.y=TRUE)
tax5s<-standardize(mod.4B[[5]], unchanged="gub_name", standardize.y=TRUE)
leg6s<-standardize(mod.4A[[6]], unchanged="gub_name", standardize.y=TRUE)
tax6s<-standardize(mod.4B[[6]], unchanged="gub_name", standardize.y=TRUE)


plot.out <- sapply(list(leg1s, tax1s, leg2s, tax2s,  leg3s, tax3s, leg4s, tax4s, leg5s, tax5s, leg6s, tax6s), function(x){
  coeftest(x, vcov.=hc1)[2,1:2]
})

plot.out <- as.data.frame(t(plot.out))
plot.out$dv <- factor(c(rep("PC 1", 2), rep("Months on Relief", 2), rep("Relief Onset", 2),  rep("Mean Bread Loan",2), rep("Average Pop on Relief",2),  rep("State Aid",2)), 
                      levels=c("PC 1", "Months on Relief","Relief Onset",  "Mean Bread Loan","Average Pop on Relief", "State Aid"))
plot.out$iv <- c(rep(c("Myers Index of age heaping", "Tax revenue per unit land"),6))
plot.out$lb <- plot.out$Estimate - 1.96*plot.out$`Std. Error`
plot.out$ub <- plot.out$Estimate + 1.96*plot.out$`Std. Error`

# Plot
figure8 <- ggplot(
  data = plot.out,
  mapping = aes(
    x = dv,
    y = Estimate,
    shape=iv
  )
) +
  geom_hline(yintercept = 0, colour = "grey") +
  geom_pointrange(mapping = aes(ymin = lb, ymax = ub), size=1, alpha = 0.75, position = position_dodge(width = 0.1)) +
  scale_y_continuous(name = "Average partial effect") +scale_shape_manual(values = c(2, 16))+
  scale_x_discrete(name = NULL, labels = c("Principal Component 1", "Months on Relief",  "Relief Onset", "Mean Bread Loan","Average Pop on Relief","Ln(State Aid+1)")) +
  theme_bw() +
  theme(
    text=element_text(size=13, family="sans", color="black"),
    legend.position = "bottom",
    legend.text = element_text(size = 13,family="sans"),
    axis.text.x = element_text(angle = 47, hjust = 1, size=13, family="sans", color="black"),
    legend.title = element_blank(),
    plot.margin = unit(c(.5,.5,.5,1), "cm")
  )

ggsave("Figure8.jpg", figure8, width = 8, height = 6)


#####################################################################################
##### Table A.12 Analysis on a subset with few Muslims; interaction effects #########
#####################################################################################

###### Alternative: non-Muslim districts ########
nonmuslim1 <- lm(PC1 ~ Myers.15_74 + ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + captPerArea + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + LnTotPop, data = subset(fam13, sh_muslim_1870<median(fam13$sh_muslim_1870)))

nonmuslim2 <- lm(PC1 ~ TaxtoLand8890 + ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + captPerArea + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + LnTotPop, data = subset(fam13, sh_muslim_1870<median(fam13$sh_muslim_1870)))

#interaction: 
muslim1 <- lm(PC1 ~ Myers.15_74*sh_muslim_1870 + sh_nonOrthNonMusl1870 + ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + captPerArea + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + LnTotPop + gub_name, data = fam13)

muslim2 <- lm(PC1 ~ TaxtoLand8890*sh_muslim_1870 + sh_nonOrthNonMusl1870 + ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + captPerArea + lndistStPetKm + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + LnTotPop + gub_name, data = fam13)


###plotting interaction effects: 

fam13$HighLegibility <- ifelse(fam13$Myers.15_74>mean(fam13$Myers.15_74), "Legibility below average", "Legibility above average") #Note higher Myers index - lower legibility

m1 <- lm(PC1 ~ sh_muslim_1870* HighLegibility + sh_nonOrthNonMusl1870 + ostatokpp2c + HarvestDrop2 + AvgCommuneLandPP + UrbanShare1883 + lnrailDist + lndistStPetKm + serfperc1 + DvorsWithHorsesPct +nobilityper1000pop77 +  blacksoil + ln_area + lat*long + LnTotPop + gub_name, data = fam13)

interaction1 <- visreg(m1, "sh_muslim_1870", by="HighLegibility", gg=TRUE, 
                       xlab="Share Muslim", ylab="Principal Component 1", fill.par=list(density = 40, angle = 90, col="grey"), 
                       line.par=list(col="black")) + theme_bw()
ggsave("interaction1.jpg",interaction1, width = 8, height = 6)


fam13$HighTax <- ifelse(fam13$TaxtoLand8890>mean(fam13$TaxtoLand8890, na.rm=TRUE), "Tax revenues above average", "Tax revenues below average")

m2 <- lm(PC1 ~ sh_muslim_1870*HighTax + sh_nonOrthNonMusl1870 + ostatokpp2c + AvgYields1883.87  + lnrailDist + lndistStPetKm + UrbanShare1883 + serfperc1 + DvorsWithHorsesPct +nobilityper1000pop77 +  blacksoil + ln_area + lat*long + LnTotPop + gub_name, data = fam13)

interaction2 <- visreg(m2, "sh_muslim_1870", by="HighTax", gg=TRUE, 
                       xlab="Share Muslim", ylab="Principal Component 1", fill.par=list(density = 40, angle = 90, col="grey"),
                       line.par=list(col="black")) + theme_bw()

ggsave("Interaction2.jpg",interaction2, width = 8, height = 6)

subsetMuslim <- texreg(
  l = list(nonmuslim1, nonmuslim2, muslim1, muslim2, m1, m2),
  override.se = list(coeftest(nonmuslim1, vcov.=hc1)[,2], coeftest(nonmuslim2, vcov.=hc1)[,2], coeftest(muslim1, vcov.=hc1)[,2], coeftest(muslim2, vcov.=hc1)[,2], coeftest(m1, vcov.=hc1)[,2], coeftest(m2, vcov.=hc1)[,2]),
  override.pval = list(coeftest(nonmuslim1, vcov.=hc1)[,4], coeftest(nonmuslim2, vcov.=hc1)[,4], coeftest(muslim1, vcov.=hc1)[,4], coeftest(muslim2, vcov.=hc1)[,4], coeftest(m1, vcov.=hc1)[,4], coeftest(m2, vcov.=hc1)[,4]),  
  custom.model.names = c("(1)","(2)","(3)","(4)", "(5)", "(6)"),
  custom.coef.names=c("Myers Index (1897)", "Harvest pc 1891", "Harvest Drop", "Tax revenue per unit land", "Share Muslim", "Share Other non-Orthodox", "Myers Index * Share Muslim", "Tax to Land * Share Muslim", 
  "Low legibility", "Low legibility*Share Muslim", "Low Tax", "Low Tax*Share Muslim"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:nomuslim.subset",
  omit.coef = "(Intercept)|gub_name|lat|long|ln_area|AvgYields1883.87|AvgCommuneLandPP|captPerArea|UrbanShare1883|lndistStPetKm|lnrailDist|serfperc1|DvorsWithHorsesPct|nobilityper1000pop77|blacksoil|LnTotPop",
  include.rsquared = FALSE, 
  include.rmse = FALSE                    
)

#########################################################
######## TABLE A.13: SOCIAL UNREST HYPOTHESIS  ##########
#########################################################

prot1 <- lm(protestsAll1880s ~ sh_muslim_1870 + sh_nonOrthNonMusl1870, data = fam13)
prot2 <- lm(protestsAll1880s ~ sh_muslim_1870 + sh_nonOrthNonMusl1870 + UrbanShare1883 + AvgCommuneLandPP + nobilityper1000pop77 + lndistStPetKm + AvgRecrHeight + serfperc1 + DvorsWithHorsesPct + blacksoil + ln_area + lat*long + gub_name, data = fam13)
prot3 <- lm(atot ~ sh_muslim_1870 + sh_nonOrthNonMusl1870 + UrbanShare1883 + AvgCommuneLandPP + nobilityper1000pop77 + lndistStPetKm + AvgRecrHeight + serfperc1 + DvorsWithHorsesPct + blacksoil + ln_area + lat*long + gub_name, data = fam13) 

myers1 <- lm(Myers.15_74 ~ protestsAll1880s + sh_muslim_1870 + sh_nonOrthNonMusl1870, data = fam13)
myers2 <- lm(Myers.15_74 ~ protestsAll1880s + lnrailDist + lndistStPetKm + AvgCommuneLandPP + mliteracy_1897 + UrbanShare1883 + AvgRecrHeight + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + gub_name, data = fam13)
myers3 <- lm(Myers.15_74 ~ atot + lnrailDist + lndistStPetKm + AvgCommuneLandPP + mliteracy_1897 + UrbanShare1883 +AvgRecrHeight + serfperc1 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + gub_name, data = fam13)

#tax1 <- lm(TaxtoLand8890 ~ protestsAll1880s + sh_muslim_1870 + sh_nonOrthNonMusl1870, data = fam13) 
tax2 <- lm(TaxtoLand8890 ~ protestsAll1880s + lnrailDist + lndistStPetKm + AvgCommuneLandPP + UrbanShare1883 + AvgRecrHeight + serfperc1 + AvgYields1883.87 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + gub_name, data = fam13)
tax3 <- lm(TaxtoLand8890 ~ atot + lnrailDist + lndistStPetKm + AvgCommuneLandPP + UrbanShare1883 + AvgRecrHeight + serfperc1 + AvgYields1883.87 + DvorsWithHorsesPct + nobilityper1000pop77 + blacksoil + ln_area + lat*long + gub_name, data = fam13)

protests <- texreg(
  l = list(prot1, prot2, prot3, myers2, myers3, tax2, tax3),
  override.se=list(coeftest(prot1, vcov.=hc1)[,2], coeftest(prot2, vcov.=hc1)[,2], coeftest(prot3, vcov.=hc1)[,2], coeftest(myers2, vcov.=hc1)[,2], coeftest(myers3, vcov.=hc1)[,2], coeftest(tax2, vcov.=hc1)[,2], coeftest(tax3, vcov.=hc1)[,2]), 
  override.pval=list(coeftest(prot1, vcov.=hc1)[,4], coeftest(prot2, vcov.=hc1)[,4], coeftest(prot3, vcov.=hc1)[,4], coeftest(myers2, vcov.=hc1)[,4], coeftest(myers3, vcov.=hc1)[,4], coeftest(tax2, vcov.=hc1)[,4], coeftest(tax3, vcov.=hc1)[,4]),      
  custom.model.names = c("(1)","(2)","(3)","(4)", "(5)", "(6)", "(7)"),
  custom.coef.names=c("Share Muslim", "Share other non-Orthodox", "Protests in the 1880s", "Protests in 1851-63"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:unrest1",
  omit.coef = "(Intercept)|gub_name|area|lat|long|lnrailDist|captPerArea|mliteracy_1897|AvgYields1883.87|AvgCommuneLandPP|UrbanShare1883|AvgRecrHeight|lndistStPetKm|serfperc1|DvorsWithHorsesPct|blacksoil|ln_area|TotPop|nobilityper1000pop77|meanPrevHarv|est_current1885",
  include.rsquared = FALSE, 
  include.rmse=FALSE
)

### Arson data for correlations reported in the Appendix: 

arsonAllGub<-read.dta("arsonGub.dta")

cor.test(arsonAllGub$rur_arson1891.92, arsonAllGub$ShareMuslim) #-0.04
cor.test(arsonAllGub$rur_arson1891.92, arsonAllGub$ShareNonOrthodox1870) #-0.28


